{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "1cd297d4",
   "metadata": {},
   "source": [
    "# Lesson 5: Understanding your Google Cloud Footprint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8a6f14f7-f9af-4e17-8dc1-1bea92847d53",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "from helper import authenticate\n",
    "CREDENTIALS, PROJECT_ID = authenticate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b2f2ca4-7058-4b08-9c95-04c8b307de18",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "from google.cloud import bigquery"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e23a8578-4940-4f64-a2ce-f2b844b28800",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5bce5027",
   "metadata": {},
   "source": [
    "* A function to export the dataset as pandas data frame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "447040c7-537c-4ee8-9050-df482caf5ada",
   "metadata": {
    "height": 285
   },
   "outputs": [],
   "source": [
    "def run_bq_query(sql):\n",
    "\n",
    "    bq_client = bigquery.Client(\n",
    "        project = PROJECT_ID,\n",
    "        credentials = CREDENTIALS)\n",
    "\n",
    "    job_config = bigquery.QueryJobConfig()\n",
    "    client_result = bq_client.query(\n",
    "        sql,\n",
    "        job_config=job_config)\n",
    "\n",
    "    job_id = client_result.job_id\n",
    "    \n",
    "    df = client_result.result().to_arrow().to_pandas()\n",
    "    print(f\"Finished job_id: {job_id}\")\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "db209949",
   "metadata": {},
   "source": [
    "* Define the query."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf6b4053-67d3-4ff2-a3c5-0de0898a94d3",
   "metadata": {
    "height": 81
   },
   "outputs": [],
   "source": [
    "query = f\"\"\"\n",
    "SELECT * from `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`\n",
    "LIMIT 5\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b576f6a4-e6cb-42a4-b5b2-fd1c4a70faaa",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "sample_df = run_bq_query(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54e97033-be31-4f41-bff0-6e48d2365f91",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "# Print the dataframe\n",
    "sample_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "262cfb30-98fb-4037-a26d-71f18f8592a0",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "print(sample_df['carbon_footprint_kgCO2e'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc3ddeb9-c389-4110-9185-7ae36c41c977",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "print(sample_df['service'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b381ab0-edde-4426-8556-8edca20edb37",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "print(sample_df['carbon_footprint_total_kgCO2e'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b7983887-d6be-46bc-a973-f7511f10e019",
   "metadata": {
    "height": 81
   },
   "outputs": [],
   "source": [
    "# Calculate total carbon footprint\n",
    "sample_df['carbon_footprint_kgCO2e'][0]['scope1'] + \\\n",
    "sample_df['carbon_footprint_kgCO2e'][0]['scope2']['location_based'] +\\\n",
    "sample_df['carbon_footprint_kgCO2e'][0]['scope3']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "82b7d67d",
   "metadata": {},
   "source": [
    "* More examples of query."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d51d3e4e-8df0-4e6d-9273-81a04f4797b2",
   "metadata": {
    "height": 115
   },
   "outputs": [],
   "source": [
    "# Select from a specific service, in this case BigQuery\n",
    "query = f\"\"\"\n",
    "SELECT SUM(carbon_footprint_kgCO2e.scope2.location_based)\n",
    "FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`\n",
    "WHERE service.description = \"BigQuery\"\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c14abb2a-4d20-4da3-9632-8e43b0d6f671",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "df = run_bq_query(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af07f769-db76-499c-b9a5-2b021306ec3d",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "# Print\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8d1687b-22cc-4188-8c13-445d958a0eb6",
   "metadata": {
    "height": 200
   },
   "outputs": [],
   "source": [
    "# Select specific column values\n",
    "query = f\"\"\"\n",
    "SELECT\n",
    "    usage_month,\n",
    "    service.description,\n",
    "    location.location,\n",
    "    carbon_footprint_total_kgCO2e.location_based\n",
    "FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`\n",
    "WHERE project.number = 11111\n",
    "ORDER BY usage_month, service.description\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e855b7a-49bf-479b-be8e-89714572cb25",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "df = run_bq_query(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2fc46855-c992-408b-abe1-a1c57ec407f7",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "# Print\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "353c1911-744f-4875-940b-4a60e04c5410",
   "metadata": {
    "height": 115
   },
   "outputs": [],
   "source": [
    "# Total amount of emisions from all projects \n",
    "query = f\"\"\"\n",
    "SELECT DISTINCT SUM(carbon_footprint_total_kgCO2e.location_based) as carbon_emissions, project.number\n",
    "FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`\n",
    "GROUP BY project.number\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "216d2b31-32cf-48bf-a429-b83389cf681a",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "df = run_bq_query(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "312e3f79-ebd9-4efd-8d7f-1ec9aea75320",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "# Print\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9aa90bfd-e344-4c9a-8d5c-61973b7bd6af",
   "metadata": {
    "height": 81
   },
   "outputs": [],
   "source": [
    "query = f\"\"\"\n",
    "SELECT DISTINCT SUM(carbon_footprint_total_kgCO2e.location_based)\n",
    "FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99352ee3-4cbd-4630-96ad-223915712b9c",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "df = run_bq_query(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e342923a-4b4e-4932-9565-a1ca7532f459",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "# Add the results before to compare the result from the recent query\n",
    "15854.736771 + 12035.135505 + 520.707209"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa069ad3-ade4-48ad-85ea-570c10fa5c56",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39211ab2-d639-467c-894a-d4fe71b44881",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "28410/986"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55dc3c85",
   "metadata": {},
   "source": [
    "* Load data into pandas dataframe."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f328bd71-d373-429d-90d3-a8feeed8bcc5",
   "metadata": {
    "height": 81
   },
   "outputs": [],
   "source": [
    "query = f\"\"\"\n",
    "SELECT *\n",
    "FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d50f2274-940e-4442-8f29-bade40f05878",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "df = run_bq_query(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31082a1f-757c-4cf1-aa4b-3237f9f44fb4",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "# Print\n",
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
